###############################################
## prism  weather data                       ##
###############################################

## Note: rgdal and gdalUtils packages are only available in the prevous R versions (R 4.2.0 and earlier)

rm(list=ls())
options(scipen=100,digits=10)
library(rgdal)
library(gdalUtils)
library(raster)
library(plyr)
library(dplyr)
library(sf)
library(tidyverse)
library(rgeos)
library(sm)
library(RcppEigen)
library(data.table)
library(xtable)
library(stargazer)
library(prism)
library(raster)
library(foreign)




##################################
##                              ##
## STEP1: download weather vars ##
##                              ##
##################################

# data source: http://prism.oregonstate.edu; time frame: 10/22-3/25, for years 13,14,...,19; 4km grid
# directory of downloaded weather data
setwd("data/raw data/weather")


# use -get_prism_dailys- download daily "tmean" and "ppt"  

options(prism.path = "tmean")
get_prism_dailys(type="tmean", minDate = "2013-10-22", maxDate = "2019-3-25", keepZip=FALSE)

options(prism.path = "ppt")
get_prism_dailys(type="ppt", minDate = "2013-10-22", maxDate = "2019-3-25", keepZip=FALSE)

# There is a bug in this API package for tdmean. Use the following step to fix it.
# use fn -get_prism_dailys1- download daily "tdmean"; 
# fn -get_prism_dailys1- is defined in "prism_utility_function.R" and "get_prism_daily_dtmean.R"                             

options(prism.path = "tdmean")
get_prism_dailys1(type="tdmean", minDate = "2013-10-22", maxDate = "2019-3-25", keepZip=FALSE)




##################################################
##                                              ##
## STEP2: project power plants' Lat/Long to map ##
##                                              ##
##################################################

# Extract power plants coordinates
setwd("data/raw data/PowerPlants_US_EIA_April2019")
data_initial <- read.dbf("PowerPlants_US_201904.dbf")
data=data_initial[c("Plant_Code","Latitude","Longitude")]
coordinates(data) <- c("Longitude","Latitude")


# Define CRS: using WGS84 (alternative: NAD83)
proj4string(data) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")


# Projection: using Albers Equal Area Conic projection system  
# https://zia207.github.io/geospatial-data-science.github.io/map-projection-coordinate-reference-systems.html#albers-equal-area-conic
centroid <- spTransform(data, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 
                                  +datum=WGS84 +units=m +no_defs "))


# Below 5 lines are:
# 1/ convert centroid from "SpatialPointsDataFrame" to "data frame" 
# 2/ in the data frame, define axis "X" and "Y"
# 3/ set the circle buffer, generate Simple Feature Polygon for circle
# 4/ transfer Simple Feature back to Spatial Polygon
# 5/ convert CRS back to log-lat

centroid <- st_as_sf(centroid) 
dat_sf <- st_as_sf(centroid, coords = c("X", "Y"))
dat_circle <- st_buffer(dat_sf, dist = 3000)
centroid_circle <- as(dat_circle, 'Spatial')
circle <- spTransform(centroid_circle, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))




######################################################################
##                                                                  ##
## STEP3: compute weighted weather var in each "circle" on each day ##
##        repeat for: tdmean; tmean; ppt                            ##
##                                                                  ##
######################################################################

setwd("data/raw data/weather/tdmean")

out.subfolder=list.files(getwd())
TT=NA
for (t in 1:length(out.subfolder)) {
  setwd("data/raw data/weather/tdmean")
  setwd(paste(getwd(), out.subfolder[t],sep="/"))
  file=list.files(getwd(), pattern="bil$", full.names=FALSE) 
  if(length(file)==0) {
    TT[t]=1
  } else {
    file.name=paste(regmatches(file, regexpr("p..", file)),regmatches(file, regexpr("20......", file)),sep="_")
    date.name=regmatches(file.name, regexpr("20......", file.name))
    day=substr(date.name , 7,8)
    month=substr(date.name , 5,6)
    year=substr(date.name , 1,4)
    date=as.Date(paste0(year,"-",month,"-",day))
    
    my.layer<- raster(paste0(file))
    var=rasterToPoints(my.layer, spatial=TRUE)
    var=as.data.frame(var)
    
    
    names(var)[1]="var"
    var$date=date
    var$ID=c(1:nrow(var))
    names(var)[2:3]=c("long","lat")
    rownames(var)=var$ID
    
    data1=data_initial["Plant_Code"]
    time=unique(var$date)
    centroid_var=var[c(1,2,3)]
    coordinates(centroid_var)=c("long","lat")
    
    proj4string(centroid_var)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
    centroid_var<- spTransform(centroid_var, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"))
    centroid_var_coord=as.data.frame(coordinates(centroid_var))
    
    # Set square coordinates
    square=cbind(centroid_var_coord$long-2000,centroid_var_coord$lat+2000, #NW corner
                 centroid_var_coord$long+2000,centroid_var_coord$lat+2000, #NE corner
                 centroid_var_coord$long+2000,centroid_var_coord$lat-2000, #SE corner
                 centroid_var_coord$long-2000,centroid_var_coord$lat-2000, #SW corner
                 centroid_var_coord$long-2000,centroid_var_coord$lat+2000 #NW corner again to close polygon
    )
    ID=var$ID
    # Create Polygon
    polys <- SpatialPolygons(mapply(function(poly, id) 
    {
      xy <- matrix(poly, ncol=2, byrow=TRUE)
      Polygons(list(Polygon(xy)), ID=id)
    }, 
    split(square, row(square)), ID),
    proj4string=CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"))
    # Add weather data to polygon to create spatial polygon dataframe
    var_poly=SpatialPolygonsDataFrame(polys,var[1])
    # Transder var_poly to simple feature, this increase the calculation speed
    var_st=st_as_sf(var_poly)
    # Find the intersection part of well circle and weather cells
    gg = st_intersection(var_st,dat_circle)
    # Calculate the overlapping area size
    if(lengths(gg)[1]==0) {
      data1$var=NA
      data1$DATE=as.Date(time)
      data2=data1
    } else {
      area=gArea(as(gg,'Spatial'),byid = TRUE)
      # Construct a dataframe with well id and overlapping weather cells
      aa=as.data.frame(gg)
      aa$ID=rownames(aa)
      aa$area=area
      names(aa)[2]="Plant_Code"
      # calculate the aggregate well circle area covered by weather cells
      aa1=aggregate(aa[, 5], list(aa$Plant_Code), sum)
      names(aa1)[1]="Plant_Code"
      # merge aggregate area coverage to the dataframe, in order to calculate the weight for each weather variable
      aa2=merge(aa,aa1, by="Plant_Code",all.x=TRUE)
      names(aa2)[2]="var"
      # Calculate the weights
      aa2$weights=aa2$area/aa2$x
      # Calculate the weighted weather variable for each weather var cell
      aa2$weighted_var=aa2$var*aa2$weights
      # Calculate the weighted var for each well
      Weighted_var=aggregate(aa2[, 8], list(aa2$Plant_Code), sum)
      names(Weighted_var)[1]="Plant_Code"
      # Merge weighted var to the original well dataset
      data2=merge(data1,Weighted_var, by="Plant_Code",all.x=TRUE)
      names(data2)[2]="PPT"
      # Set time variable
      data2$DATE=as.Date(time)
      
      # Store the processed daily weather-plant merged datafile (there are too many data files, did not included in the replication package)
      setwd("data/processed data/tdmean_Plants")
      write.csv(data2,paste(time,".csv"))
    }
  }
}


#############################################################################################
# Run the following code separately from the previous codes
# Combine daily data into panel
#############################################################################################

setwd("data/processed data/tdmean_Plants")
out.file=list.files ()
data2=read.csv("2016-11-22 .csv")
data2=data2[-1]
all_data=data.frame(matrix(ncol = ncol(data2), nrow = 0))
names(all_data)=names(data2)
  for(n in 1:length(out.file)){
    # 
    daily_data=read.csv(out.file[n])
    all_data=rbind(all_data,daily_data)
    print(n)
  }

setwd("data/processed data")
write.csv(all_data,"all_tdmean.csv") # did not include in the replication package due to redundancy.



setwd("data/processed data")
tmean=read.csv("all_tmean.csv")
ppt=read.csv("all_ppt.csv")
tdmean=read.csv("all_tdmean.csv")
all_weather=left_join(tmean,ppt, by=c("Plant_Code","DATE"))
write.csv(all_weather,"all_weather.csv")


